3  Results

3.1 Loading the data


Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Loading required package: zoo

Attaching package: 'zoo'
The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

######################### Warning from 'xts' package ##########################
#                                                                             #
# The dplyr lag() function breaks how base R's lag() function is supposed to  #
# work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
# source() into this session won't work correctly.                            #
#                                                                             #
# Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
# conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
# dplyr from breaking base R's lag() function.                                #
#                                                                             #
# Code in packages is not affected. It's protected by R's namespace mechanism #
# Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
#                                                                             #
###############################################################################

Attaching package: 'xts'
The following objects are masked from 'package:dplyr':

    first, last
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ lubridate 1.9.3     ✔ readr     2.1.5
✔ purrr     1.0.2     ✔ tibble    3.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ xts::first()    masks dplyr::first()
✖ dplyr::lag()    masks stats::lag()
✖ xts::last()     masks dplyr::last()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
energy_data_annual <- read_xlsx(path='./data_source/combined_annual_data.xlsx')
colnames(energy_data_annual) <- sapply(colnames(energy_data_annual), function(var) {
  trimmed <- str_replace_all(var, "\\.x", "")
  trimmed <- str_replace_all(trimmed, "\\.y", "")
  return(trimmed)
})

energy_data_annual$Year <- as.integer(energy_data_annual$Year)
energy_data_annual <- energy_data_annual[, !duplicated(colnames(energy_data_annual))]
energy_data_annual
Code
energy_data_monthly <- read_xlsx(path='./data_source/combined_monthly_data.xlsx')
colnames(energy_data_monthly) <- sapply(colnames(energy_data_monthly), function(var) {
  trimmed <- str_replace_all(var, "\\.x", "")
  trimmed <- str_replace_all(trimmed, "\\.y", "")
  return(trimmed)
})

energy_data_monthly$Month <- as.Date(energy_data_monthly$Month)
energy_data_monthly <- energy_data_monthly[, !duplicated(colnames(energy_data_monthly))]
energy_data_monthly

3.2 Energy Production and Consumption Overview

3.2.1 Primary Energy Production

Code
ggplot(energy_data_annual, aes(x=Year)) + 
  geom_line(aes(y=`Total Fossil Fuels Production (Quadrillion Btu)`, color='Fossil Fuels Production'), size=1) +
  geom_line(aes(y=`Nuclear Electric Power Production (Quadrillion Btu)`, color='Nuclear Power Production'), size=1) +
  geom_line(aes(y=`Total Renewable Energy Production (Quadrillion Btu)`, color='Renewable Energy Production'), size=1) +
  labs(
    title='Primary Energy Production',
    x = 'Year',
    y = 'Production (Quadrillion Btu)',
    caption = 'Data Source: U.S. Energy Information Association',
  ) + 
  theme(
    plot.title = element_text(hjust=0.5, face='bold', color='darkblue'),
    legend.position = 'bottom',
    legend.box = 'horizontal',
    legend.title = element_blank()
  ) +
  scale_color_manual(values = c('Fossil Fuels Production' = 'red', 
                                'Nuclear Power Production' = 'blue', 
                                'Renewable Energy Production' = 'green'))

The graph illustrates the trend in primary energy production from 1950 to 2020. Initially, there is a steady increase from approximately 28 quadrillion Btu in 1950 to 59 quadrillion Btu by 1970. This is followed by a plateau in production from 1970 to 2010. After 2010, there is a noticeable spike in production, which may be attributed to advancements in high-performance computing in large data centers which needs high energy.

Fossil Fuels Production : This line shows a significant increase over the years, indicating a substantial rise in energy production. It suggests that this energy source has been the dominant contributor to primary energy production.

Nuclear and Renewable Energy Production : These lines remain relatively flat compared to the fossil fuel, indicating that these energy sources have contributed less to the overall primary energy production. They show slight increases over time but are not as pronounced as the fossil fuel.

3.2.2 Primary Energy Consumption

Code
ggplot(energy_data_annual, aes(x=Year)) +
  geom_line(aes(y=`Total Fossil Fuels Consumption (Quadrillion Btu)`, color='Fossil Fuels Consumption'), size=1) +
  geom_line(aes(y=`Nuclear Electric Power Consumption (Quadrillion Btu)`, color='Nuclear Power Consumption'), size=1) +
  geom_line(aes(y=`Total Renewable Energy Consumption (Quadrillion Btu)`, color='Renewable Energy Consumption'), size=1) +
  labs(
    title='Primary Energy Consumption',
    x='Year',
    y='Consumption (Quadrillion Btu)',
    caption = 'Data Source: U.S. Energy Information Association'
  ) + 
  scale_color_manual(
    values = c(
      'Fossil Fuels Consumption' = 'red',
      'Nuclear Power Consumption' = 'blue',
      'Renewable Energy Consumption' = 'green'
    )
  ) +
  theme(
    plot.title = element_text(hjust=0.5, face='bold', color='darkblue'),
    legend.position = 'bottom',
    legend.box = 'horizontal',
    legend.title = element_blank()
  )

Fossil Fuels Consumption : This is the dominant source of energy consumption throughout the period. There is a steady increase from 1950 to around 2005, with some fluctuations. After 2005, the consumption plateaus with minor ups and downs.

Nuclear Power Consumption : This energy consumption starts to become little significant around the late 1960s and early 1970s. It shows gradual growth until about 2000, after which it stabilizes.

Renewable Energy Consumption : This energy consumption begins to rise noticeably in the late 1990s. It shows a steady increase, especially post-2000, and appears to be catching up with nuclear power by the end of the period.

The fossil fuels make a large chunk of energy consumption throughout the years. The other energy consumption source like nuclear and renewable has very little contribution. There is a serious need of investments in these energy sources in order to catch up or reduce the dependence of fossil fuels.

3.2.3 Primary Energy Imports and Exports

Code
ggplot(energy_data_annual, aes(x=Year, y=`Primary Energy Net Imports (Quadrillion Btu)`)) + 
  geom_bar(stat = 'identity', fill='orange', color='black') + 
  labs(
    title='Primary Energy Net Imports',
    x = 'Year',
    y = 'Energy (Quadrillion Btu)',
    caption = 'Data Source: U.S. Energy Information Associaton'
  ) + 
  theme(
    plot.title = element_text(hjust=0.5, face='bold', color='darkblue'),
  )

The chart shows the net imports of primary energy into the United States over time.

Observations :

  • 1950s to Early 1970s: The net energy imports were relatively low and stable. This period shows minimal dependency on energy imports.

  • Mid-1970s to Early 1980s: There was a noticeable increase in net energy imports, likely due to rising energy demands and geopolitical events affecting oil supply. Since the 1970s, the global oil trade has been predominantly conducted in U.S. dollars (USD), creating a symbiosis between America’s currency and the world’s most traded commodity. The petrodollar emerged as an economic concept in the 1970s as growing U.S. imports of increasingly costly crude oil increased the dollar holdings of foreign producers.

  • 1980s to Early 2000s: A significant rise in net imports occurred, peaking around the mid-2000s. This reflects increased energy consumption and reliance on foreign energy sources. The U.S. experienced growing energy demands driven by economic expansion and technological advancements. This led to higher consumption of oil and natural gas. The U.S. became increasingly reliant on foreign oil, with imports rising significantly.

  • Mid-2000s to Present: There is a sharp decline in net imports, eventually turning negative. This indicates that the U.S. became a net exporter of primary energy. Factors contributing to this include increased domestic energy production (especially from shale gas and oil), improved energy efficiency, and shifts towards renewable energy sources. (Source: U.S. Energy Independence)

Overall, the chart illustrates a transition from high dependency on imported energy to a position where the U.S. exports more energy than it imports.

3.2.4 Energy Imports vs Energy Consumption

Code
ggplot(energy_data_monthly, aes(x=Month)) + 
  geom_line(aes(y=`Primary Energy Imports (Quadrillion Btu)`, color='Primary Energy Imports'), size=0.5) +
  geom_line(aes(y=`Total Primary Energy Consumption (Quadrillion Btu)`, color='Total Primary Energy Consumption'), size=0.5) +
  labs(
    title = 'Energy Consumption and Energy Imports',
    x='Timeline',
    y='Energy (Quadrillion Btu)',
    caption = 'Data Source: U.S. Energy Information Association'
  ) +
  theme(
    plot.title = element_text(hjust=0.5, face='bold', color='darkblue'),
    legend.position = 'bottom',
    legend.box = 'horizontal',
    legend.title = element_blank()
  )

Code
ggplot(energy_data_annual, aes(x = `Primary Energy Imports (Quadrillion Btu)`, y = `Total Primary Energy Consumption (Quadrillion Btu)`)) +
  geom_point(color = "blue", size = 1) +
  geom_smooth(method = "lm", color = "red", se = TRUE) +
  labs(
    title = "Energy Dependency Analysis",
    x = "Primary Energy Imports (Quadrillion Btu)",
    y = "Total Primary Energy Consumption (Quadrillion Btu)",
    subtitle = paste("Pearson Correlation Coefficient:", round(cor(energy_data_annual$`Primary Energy Imports (Quadrillion Btu)`, energy_data_annual$`Total Primary Energy Consumption (Quadrillion Btu)`, method = "pearson"), 2)),
    caption = 'Data Source: U.S. Energy Information Association'
  ) +
  theme(
    plot.title = element_text(hjust=0.5, face='bold', color='darkblue'),
    plot.subtitle =  element_text(hjust=0.5, color='purple')
  )
`geom_smooth()` using formula = 'y ~ x'

The two graphs provide a comprehensive analysis of energy consumption and import patterns over time and it reveals important trends and relationships.

Consumption Trends
Total Primary Energy Consumption shows a steady upward trajectory from around 5 Quadrillion Btu to approximately 7.5 Quadrillion Btu each month starting from January 1973 and ending on August 2024. Notable seasonal fluctuations appear throughout the timeline with regular peaks and troughs. The overall consumption pattern demonstrates consistent growth despite short-term variations

Import Patterns
Primary Energy Imports started at roughly 1.5 Quadrillion Btu each month in the 1970s. Imports peaked around 2005-2010 at approximately 3 Quadrillion Btu each month. A notable decline in imports occurred after 2010, stabilizing at about 2 Quadrillion Btu each month by 2020.

Statistical Relationship
The scatter plot reveals a strong positive correlation between imports and consumption. The Pearson Correlation Coefficient of 0.95 indicates an extremely strong linear relationship between consumption and import. The narrow confidence interval (gray shading) suggests high prediction reliability. The regression line shows a clear positive slope, indicating that higher imports generally correspond to higher consumption. Data points cluster tightly around the regression line, particularly in the middle range. The relationship remains consistent across different levels of imports and consumption.

Key Insights
- Despite growing total energy consumption, there’s a decreasing reliance on imports in recent years.
- The gap between consumption and imports has widened over time, suggesting increased domestic energy production or diversification of energy sources.
- The seasonal variations in consumption are more pronounced than fluctuations in imports, indicating stable import patterns despite varying demand.

3.3 Sectorwise Energy Consumption Analysis

3.3.1 Energy Overview by Residential Sector

Code
rs_energy_consumed <- xts(x = energy_data_monthly$`Total Energy Consumed by the Residential Sector (Trillion Btu)`, order.by = energy_data_monthly$Month)
rs_energy_loss <- xts(x = energy_data_monthly$`Residential Sector Electrical System Energy Losses (Trillion Btu)`, order.by = energy_data_monthly$Month)

dygraph(cbind(rs_energy_consumed, rs_energy_loss), main='Energy Consumed vs Energy Loss (Residential Sector)') |>
  dySeries('rs_energy_consumed', label = 'Energy Consumed') |>
  dySeries('rs_energy_loss', label = 'Energy Loss') |>
  dyRangeSelector() |>
  dyOptions(stackedGraph = TRUE, drawPoints = TRUE, pointSize = 2) |>
  dyAxis("x", label = "Timeline") |>
  dyAxis("y", label = "Energy (Trillion Btu)")

3.3.2 Energy Overview by Commercial Sector

Code
rs_energy_consumed <- xts(x = energy_data_monthly$`Total Energy Consumed by the Commercial Sector (Trillion Btu)`, order.by = energy_data_monthly$Month)
rs_energy_loss <- xts(x = energy_data_monthly$`Commercial Sector Electrical System Energy Losses (Trillion Btu)`, order.by = energy_data_monthly$Month)

dygraph(cbind(rs_energy_consumed, rs_energy_loss), main='Energy Consumed vs Energy Loss (Commercial Sector)') |>
  dySeries('rs_energy_consumed', label = 'Energy Consumed') |>
  dySeries('rs_energy_loss', label = 'Energy Loss') |>
  dyRangeSelector() |>
  dyOptions(stackedGraph = TRUE, drawPoints = TRUE, pointSize = 2) |>
  dyAxis("x", label = "Timeline") |>
  dyAxis("y", label = "Energy (Trillion Btu)")

3.3.3 Energy Overview by Industrial Sector

Code
rs_energy_consumed <- xts(x = energy_data_monthly$`Total Energy Consumed by the Industrial Sector (Trillion Btu)`, order.by = energy_data_monthly$Month)
rs_energy_loss <- xts(x = energy_data_monthly$`Industrial Sector Electrical System Energy Losses (Trillion Btu)`, order.by = energy_data_monthly$Month)

dygraph(cbind(rs_energy_consumed, rs_energy_loss), main='Energy Consumed vs Energy Loss (Industrial Sector)') |>
  dySeries('rs_energy_consumed', label = 'Energy Consumed') |>
  dySeries('rs_energy_loss', label = 'Energy Loss') |>
  dyRangeSelector() |>
  dyOptions(stackedGraph = TRUE, drawPoints = TRUE, pointSize = 2) |>
  dyAxis("x", label = "Timeline") |>
  dyAxis("y", label = "Energy (Trillion Btu)")

3.3.4 Energy Overview by Transportation Sector

Code
rs_energy_consumed <- xts(x = energy_data_monthly$`Total Energy Consumed by the Transportation Sector (Trillion Btu)`, order.by = energy_data_monthly$Month)
rs_energy_loss <- xts(x = energy_data_monthly$`Electrical System Energy Losses Proportioned to the Transportation Sector (Trillion Btu)`, order.by = energy_data_monthly$Month)

dygraph(cbind(rs_energy_consumed, rs_energy_loss), main='Energy Consumed vs Energy Loss (Transportation Sector)') |>
  dySeries('rs_energy_consumed', label = 'Energy Consumed') |>
  dySeries('rs_energy_loss', label = 'Energy Loss') |>
  dyRangeSelector() |>
  dyOptions(stackedGraph = TRUE, drawPoints = TRUE, pointSize = 2) |>
  dyAxis("x", label = "Timeline") |>
  dyAxis("y", label = "Energy (Trillion Btu)")